Abstract
Background In the report naomi-simple_fit with parameter tmbstan = TRUE, we used the NUTS algorithm to perform MCMC inference for the simplified Naomi model.
Task Here we assess whether or not the results of the MCMC are suitable using a range of diagnostic tools.
We start by obtaining results from the latest version of naomi-simple_fit with tmbstan = TRUE.
out <- readRDS("depends/out.rds")
mcmc <- out$mcmc$stanfit
This MCMC took 3.28 days to run
bayesplot::color_scheme_set("viridisA")
ggplot2::theme_set(theme_minimal())
We are looking for values of \(\hat R\) less than 1.1 here.
rhats <- bayesplot::rhat(mcmc)
bayesplot::mcmc_rhat(rhats)
(big_rhats <- rhats[rhats > 1.1])
## named numeric(0)
length(big_rhats) / length(rhats)
## [1] 0
Reasonable to be worried about values less than 0.1 here.
ratios <- bayesplot::neff_ratio(mcmc)
bayesplot::mcmc_neff(ratios)
What are the total effective sample sizes?
#' I think that this $summary should be all of the chains grouped together
mcmc_summary <- summary(mcmc)$summary
data.frame(mcmc_summary) %>%
tibble::rownames_to_column("param") %>%
ggplot(aes(x = n_eff)) +
geom_histogram(alpha = 0.8) +
labs(x = "ESS", y = "Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
How much autocorrelation is there in the chains?
bayesplot::mcmc_acf(mcmc, pars = vars(starts_with("beta")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("beta")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("logit")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("log_sigma")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_rho_x[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_rho_xs[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("us_rho_x[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("us_rho_xs[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_rho_a[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_rho_as[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_alpha_x[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_alpha_xs[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("us_alpha_x[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("us_alpha_xs[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_alpha_a[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_alpha_as[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("u_alpha_xa[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("ui_lambda_x[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("ui_anc_rho_x[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("ui_anc_alpha_x[")))
bayesplot::mcmc_trace(mcmc, pars = vars(starts_with("log_or_gamma["))) #' N.B. these are from the ANC attendance model
There is a prior suspicion (from Jeff, Tim, Rachel) that the ART attendance model is unidentifiable.
Let’s have a look at the pairs plot for neighbouring districts and the log_or_gamma parameter.
area_merged <- sf::read_sf(system.file("extdata/demo_areas.geojson", package = "naomi"))
nb <- area_merged %>%
filter(area_level == max(area_level)) %>%
bsae::sf_to_nb()
neighbours_pairs_plot <- function(par, i) {
neighbour_pars <- paste0(par, "[", c(i, nb[[i]]), "]")
bayesplot::mcmc_pairs(mcmc, pars = neighbour_pars, diag_fun = "hist", off_diag_fun = "hex")
}
# area_merged %>%
# filter(area_level == max(area_level)) %>%
# print(n = Inf)
Here are Nkhata Bay and neighbours:
neighbours_pairs_plot("log_or_gamma", 5)
And here are Blantyre and neighbours:
neighbours_pairs_plot("log_or_gamma", 26)
np <- bayesplot::nuts_params(mcmc)
Are there any divergent transitions?
np %>%
filter(Parameter == "divergent__") %>%
summarise(n_divergent = sum(Value))
bayesplot::mcmc_nuts_divergence(np, bayesplot::log_posterior(mcmc))
We can also use energy plots (Betancourt 2017): ideally these two histograms would be the same When the histograms are quite different, it may suggest the chains are not fully exploring the tails of the target distribution.
bayesplot::mcmc_nuts_energy(np)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS 13.3.1
##
## Matrix products: default
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] multi.utils_0.1.0 sf_1.0-9 bayesplot_1.9.0 rstan_2.21.5 StanHeaders_2.21.0-7 stringr_1.5.0 purrr_1.0.1 tidyr_1.2.1 readr_2.1.3 ggplot2_3.4.0
## [11] forcats_0.5.2 dplyr_1.0.10 INLA_22.05.07 sp_1.5-1 foreach_1.5.2 Matrix_1.5-1
##
## loaded via a namespace (and not attached):
## [1] uuid_1.1-0 backports_1.4.1 tmbstan_1.0.4 plyr_1.8.8 queuer_0.3.0 TMB_1.9.2 splines_4.2.0 storr_1.2.5 inline_0.3.19 digest_0.6.31
## [11] htmltools_0.5.3 fansi_1.0.4 magrittr_2.0.3 checkmate_2.1.0 memoise_2.0.1 naomi_2.8.5 tzdb_0.3.0 RcppParallel_5.1.5 matrixStats_0.62.0 conan_0.1.1
## [21] askpass_1.1 lpSolve_5.6.15 prettyunits_1.1.1 colorspace_2.0-3 blob_1.2.3 xfun_0.37 hexbin_1.28.2 callr_3.7.3 crayon_1.5.2 jsonlite_1.8.4
## [31] didehpc_0.3.18 iterators_1.0.14 glue_1.6.2 inf.utils_0.1.0 gtable_0.3.1 MatrixModels_0.5-0 V8_4.2.2 distributional_0.3.0 pkgbuild_1.3.1 traduire_0.0.6
## [41] abind_1.4-5 scales_1.2.1 mvtnorm_1.1-3 DBI_1.1.3 Rcpp_1.0.10 spData_2.2.1 units_0.8-0 bit_4.0.5 spdep_1.2-7 proxy_0.4-27
## [51] stats4_4.2.0 httr_1.4.4 wk_0.7.0 posterior_1.2.2 ellipsis_0.3.2 pkgconfig_2.0.3 loo_2.5.1 context_0.3.0 farver_2.1.1 sass_0.4.4
## [61] deldir_1.0-6 utf8_1.2.3 tidyselect_1.2.0 labeling_0.4.2 rlang_1.1.0 reshape2_1.4.4 munsell_0.5.0 tools_4.2.0 cachem_1.0.6 bsae_0.2.7
## [71] cli_3.6.1 generics_0.1.3 RSQLite_2.2.14 ggridges_0.5.3 evaluate_0.20 fastmap_1.1.0 yaml_2.3.7 processx_3.8.0 knitr_1.41 bit64_4.0.5
## [81] fs_1.6.1 zip_2.2.2 s2_1.1.1 orderly_1.4.3 compiler_4.2.0 rstudioapi_0.14 filelock_1.0.2 curl_5.0.0 e1071_1.7-12 tibble_3.2.1
## [91] statmod_1.4.36 bslib_0.4.1 stringi_1.7.8 highr_0.9 ids_1.0.1 ps_1.7.3 desc_1.4.2 lattice_0.20-45 classInt_0.4-8 mvQuad_1.0-6
## [101] tensorA_0.36.2 vctrs_0.6.1 pillar_1.9.0 lifecycle_1.0.3 jquerylib_0.1.4 data.table_1.14.6 R6_2.5.1 bookdown_0.26 KernSmooth_2.23-20 aghq_0.4.0
## [111] gridExtra_2.3 codetools_0.2-18 boot_1.3-28 assertthat_0.2.1 openssl_2.0.5 rprojroot_2.0.3 withr_2.5.0 hms_1.1.2 grid_4.2.0 class_7.3-20
## [121] rmarkdown_2.18 getPass_0.2-2 pkgdepends_0.3.1 rematch_1.0.1